data_dir = "../../data/Zheng_2021/"
tcr_beta = fread("../../data/Zheng_2021_processed/TCR_beta.txt.gz")
dim(tcr_beta)## [1] 143559 18
tcr_beta[1:2,]## barcode is_cell high_confidence length chain v_gene d_gene
## 1: AAACCTGAGAAACCGC.60 TRUE TRUE 706 TRB TRBV20-1 TRBD1
## 2: AAACCTGAGAACTGTA.2 TRUE TRUE 655 TRB TRBV30 TRBD2
## j_gene c_gene full_length productive cdr3
## 1: TRBJ2-7 TRBC2 TRUE TRUE CSASIGGGLRLGEQYF
## 2: TRBJ1-1 TRBC1 TRUE TRUE CAWSPGTVNTEAFF
## cdr3_nt reads umis library.id
## 1: TGCAGTGCTAGCATAGGGGGGGGTTTAAGGCTGGGCGAGCAGTACTTC 9872 18 THCA-P20190122-N
## 2: TGTGCCTGGAGTCCCGGGACGGTGAACACTGAAGCTTTCTTT 25215 29 BC-P20190403-T
## cdr3_length key
## 1: 16 THCA-P20190122-N:AAACCTGAGAAACCGC.60
## 2: 14 BC-P20190403-T:AAACCTGAGAACTGTA.2
These are lung cancer specific signatures.
CD4_Hanada = scan(file="../../data/Hanada_2022/CD4_signature.txt",
what=character(0))
CD4_Hanada## [1] "CXCL13+" "NR3C1+" "ADGRG1+" "NMG+" "ITM2A+" "ETV7+" "COTL1+"
## [8] "B2M+" "IGFL2+" "VIM-" "MT-ND1-" "MALAT1-" "EMP3-"
Hanada = list(CD4=gsub("(\\+|-)$", "", CD4_Hanada),
CD4_sign=str_extract(CD4_Hanada, pattern="(\\+|-)$"))
Hanada## $CD4
## [1] "CXCL13" "NR3C1" "ADGRG1" "NMG" "ITM2A" "ETV7" "COTL1" "B2M"
## [9] "IGFL2" "VIM" "MT-ND1" "MALAT1" "EMP3"
##
## $CD4_sign
## [1] "+" "+" "+" "+" "+" "+" "+" "+" "+" "-" "-" "-" "-"
Lowery et al. (2022) not only generated their own gene signatures, but also compiled a long list of signatures from other studies. Here we first check their own signatures.
clean_list <- function(x){
for(i in 1:length(x)){
x[[i]] = as.character(na.omit(x[[i]]))
}
x
}
sig_Lowery = read_excel("../../data/Lowery_2022/science.abl5447_table_s4.xlsx",
sheet="Signatures from this study")
dim(sig_Lowery)## [1] 243 14
sig_Lowery[1:2,]## # A tibble: 2 × 14
## `Cluster 0` `Cluster 1` `Cluster 2` `Cluster 3` `Cluster 4` `Cluster 5`
## <chr> <chr> <chr> <chr> <chr> <chr>
## 1 RPS4Y1 CXCL13 LGALS1 CREM ZNF683 EGR1
## 2 TUBA1B TIGIT ANXA1 SNHG12 XCL2 DUSP2
## # … with 8 more variables: Cluster 6 <chr>, Cluster 7 <chr>, Cluster 8 <chr>,
## # Cluster 9 <chr>, Cluster 10 <chr>, Cluster 11 <chr>, NeoTCR4 <chr>,
## # NeoTCR8 <chr>
Lowery = list(CD4 = sig_Lowery$NeoTCR4)
Lowery = clean_list(Lowery)
sapply(Lowery, length)## CD4
## 40
NeoT4 = read_excel("../../data/Lowery_2022/science.abl5447_table_s8.xlsx",
sheet="NeoTCR4")
dim(NeoT4)## [1] 111 7
NeoT4[1:2,]## # A tibble: 2 × 7
## Gene p_val avg_logFC pct.1 pct.2 p_val_adj cluster
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 CXCL13 6.13e-137 1.13 0.706 0.182 8.87e-133 CD4_NeoTCR
## 2 HMOX1 2.17e- 57 0.877 0.288 0.065 3.14e- 53 CD4_NeoTCR
table(NeoT4$avg_logFC < 0, NeoT4$p_val_adj < 0.05)##
## FALSE TRUE
## FALSE 1 61
## TRUE 12 37
table(Lowery$CD4 %in% NeoT4$Gene)##
## TRUE
## 40
wmat = match(Lowery$CD4, NeoT4$Gene)
max(NeoT4$p_val_adj[wmat])## [1] 0.02022774
w4 = NeoT4$avg_logFC < 0 & NeoT4$p_val_adj < 0.05
Lowery_negative = list(CD4 = NeoT4$Gene[w4])
sapply(Lowery_negative, length)## CD4
## 37
Next check the signatures from other studies
sigs = read_excel("../../data/Lowery_2022/science.abl5447_table_s4.xlsx",
sheet="Published signatures")
dim(sigs)## [1] 108 107
sigs[1:2,]## # A tibble: 2 × 107
## Krishna.ACT.Stem.Like Krishna.ACT.Term.Di… Li.CD8.DYS TOX_Scott Wu.8EFF Wu.8EM
## <chr> <chr> <chr> <chr> <chr> <chr>
## 1 KLF2 ENTPD1 LAG3 IGKC GNLY GZMK
## 2 RASA3 CD69 HAVCR2 AVIL NKG7 CCL4
## # … with 101 more variables: Wu.8Trm.1 <chr>, Wu.8Trm.2 <chr>, Wu.8Trm.3 <chr>,
## # Wu.8chrom <chr>, Wu.8Mit <chr>, Wu.8KLRB1 <chr>, Oh.CD8.CD39 <chr>,
## # Oh.CD8.Naive <chr>, Oh.CD8.HSP <chr>, Oh.CD8.MAIT <chr>,
## # Oh.CD8.FGFBP2 <chr>, Oh.CD8.RPL <chr>, Oh.CD8.MITO <chr>, Oh.CD8.XCL <chr>,
## # Oh.CD8.CM <chr>, Oh.CD8.PRO <chr>, Mel_Exhaust_Tirosh <chr>,
## # CD8_G_Feldman <chr>, CD8_B_Feldman <chr>, Exhaust_1_Feldman <chr>,
## # Exhaust_2_Feldman <chr>, Exhaust_3_Feldman <chr>, …
aucs = read_excel("../../data/Lowery_2022/science.abl5447_table_s12.xlsx")
dim(aucs)## [1] 108 6
aucs[1:2,]## # A tibble: 2 × 6
## `Gene Signatures` `CD4 training` `CD8 training` `CD4 validation`
## <chr> <dbl> <dbl> <dbl>
## 1 B16_PROG.EX_Miller..40g. 0.404 0.345 0.384
## 2 B16_TERMINAL.EX_Miller..27g. 0.486 0.75 0.484
## # … with 2 more variables: CD8 validation <dbl>, Public viral TCRs <dbl>
order.train = order(aucs$`CD4 training`, decreasing=TRUE)[1:5]
order.valid = order(aucs$`CD4 validation`, decreasing=TRUE)[1:5]
aucs[order.train,]## # A tibble: 5 × 6
## `Gene Signatures` `CD4 training` `CD8 training` `CD4 validation`
## <chr> <dbl> <dbl> <dbl>
## 1 NeoTCR4.40..40g. 0.902 0.802 0.842
## 2 Jansen_Term.diff..73g. 0.866 0.784 0.813
## 3 Caushi.CD4.Tfh.2...66g. 0.844 0.74 0.819
## 4 Cluster.1..50g. 0.832 0.561 0.743
## 5 Oh.CD4.CXCL13..49g. 0.827 0.725 0.824
## # … with 2 more variables: CD8 validation <dbl>, Public viral TCRs <dbl>
aucs[order.valid,]## # A tibble: 5 × 6
## `Gene Signatures` `CD4 training` `CD8 training` `CD4 validation`
## <chr> <dbl> <dbl> <dbl>
## 1 NeoTCR4.40..40g. 0.902 0.802 0.842
## 2 Oh.CD4.CXCL13..49g. 0.827 0.725 0.824
## 3 Caushi.CD4.Tfh.2...66g. 0.844 0.74 0.819
## 4 Jansen_Term.diff..73g. 0.866 0.784 0.813
## 5 CD8_B_Feldman..50g. 0.736 0.755 0.773
## # … with 2 more variables: CD8 validation <dbl>, Public viral TCRs <dbl>
CD4.sigs = intersect(aucs$`Gene Signatures`[order.train],
aucs$`Gene Signatures`[order.valid])
CD4.sigs## [1] "NeoTCR4.40..40g." "Jansen_Term.diff..73g."
## [3] "Caushi.CD4.Tfh.2...66g." "Oh.CD4.CXCL13..49g."
sapply(sigs[grep("Oh.CD4", names(sigs))], function(x){length(na.omit(x))})## Oh.CD4.CM Oh.CD4IL2RAHI Oh.CD4IL2RALO Oh.CD4.Activate Oh.CD4.GZMB
## 50 50 50 50 50
## Oh.CD4.GZMK Oh.CD4.TH17 Oh.CD4.CXCL13 Oh.CD4.MITO Oh.CD4.HSP
## 50 50 50 14 50
## Oh.CD4.PROLIF
## 50
sigs_CD4 = list(Lowery.CD4.neg.37g = Lowery_negative[["CD4"]],
Jansen_Term.diff.73g = sigs[["Jansen_Term diff"]],
Caushi.CD4.Tfh.2.66g = sigs[["Caushi.CD4-Tfh(2)"]],
Oh.CD4.CXCL13.50g = sigs[["Oh.CD4.CXCL13"]],
Lowery.CD4.40g = Lowery$CD4)
sigs_CD4 = clean_list(sigs_CD4)
sapply(sigs_CD4, length)## Lowery.CD4.neg.37g Jansen_Term.diff.73g Caushi.CD4.Tfh.2.66g
## 37 73 66
## Oh.CD4.CXCL13.50g Lowery.CD4.40g
## 50 40
cd4T_files = list.files(data_dir, pattern="10X.CD4")
cd4T_files## [1] "GSE156728_BC_10X.CD4.counts.txt.gz"
## [2] "GSE156728_BCL_10X.CD4.counts.txt.gz"
## [3] "GSE156728_ESCA_10X.CD4.counts.txt.gz"
## [4] "GSE156728_MM_10X.CD4.counts.txt.gz"
## [5] "GSE156728_PACA_10X.CD4.counts.txt.gz"
## [6] "GSE156728_RC_10X.CD4.counts.txt.gz"
## [7] "GSE156728_THCA_10X.CD4.counts.txt.gz"
## [8] "GSE156728_UCEC_10X.CD4.counts.txt.gz"
for(f1 in cd4T_files){
d1 = fread(file.path(data_dir, f1))
stopifnot(anyDuplicated(names(d1)) == 0)
print(sprintf("file: %s, dimension: (%d,%d)", f1, nrow(d1), ncol(d1)))
Tcells = which(names(d1) %in% tcr_beta$barcode)
dT = data.matrix(d1[,..Tcells])
rownames(dT) = d1$V1
print(sprintf("%d/%d cells with TCR", length(Tcells), ncol(d1)))
gs = list()
for(s1 in names(sigs_CD4)){
match.gene = na.omit(match(sigs_CD4[[s1]], d1$V1))
print(sprintf("%s: %d/%d genes are found.",
s1, length(match.gene), length(sigs_CD4[[s1]])))
gs[[s1]] = d1$V1[match.gene]
}
sce = SingleCellExperiment(assays=list(counts=dT))
sce = logNormCounts(sce)
sce
gsva.es = gsva(counts(sce), gs, method="ssgsea", verbose=FALSE)
stopifnot(all(colnames(gsva.es) == colnames(sce)))
# some signature genes appear multiple times
sig_genes = unlist(gs[-1])
print("the frequency of sigature genes:")
print(table(table(sig_genes)))
mat1 = match(sig_genes, rownames(sce))
stopifnot(all(! is.na(mat1)))
mat2 = match(Hanada$CD4[which(Hanada$CD4_sign == "+")], rownames(sce))
mat2 = na.omit(mat2)
mat3 = match(Hanada$CD4[which(Hanada$CD4_sign == "-")], rownames(sce))
mat3 = na.omit(mat3)
print(sprintf("%d/%d genes are found for Hanada postive/negative signatures",
length(mat2), length(mat3)))
ave_combined = colMeans(logcounts(sce)[mat1,])
ave_Hanada = colMeans(logcounts(sce)[mat2,])
ave_Hanada_neg = colMeans(logcounts(sce)[mat3,])
sigs = data.frame(ave_Hanada_neg, t(gsva.es)[,1],
ave_Hanada, ave_combined, t(gsva.es)[,-1])
names(sigs)[2] = rownames(gsva.es)[1]
dim(sigs)
sigs[1:2,]
sigs = scale(sigs)
gc = ggcorrplot(cor(sigs), lab = TRUE)
print(gc)
unq_genes = unique(c(unlist(gs), rownames(sce)[mat2], rownames(sce)[mat3]))
print("number of genes used for PCA:")
print(length(unq_genes))
stopifnot(all(unq_genes %in% d1$V1))
match.gene = match(unq_genes, d1$V1)
sce = runPCA(sce, ncomponents=20, subset_row=match.gene)
set.seed(100100)
sce = runUMAP(sce, dimred="PCA")
clust = clusterCells(sce, use.dimred="PCA",
BLUSPARAM=NNGraphParam(cluster.fun="louvain"))
colData(sce)$louvain = clust
default_palette = scales::hue_pal()(length(unique(clust)))
g1 = plotUMAP(sce, colour_by=I(colData(sce)$louvain),
point_size=0.6) +
guides(color = guide_legend(override.aes = list(size = 2))) +
scale_color_manual(values = default_palette)
print(g1)
colData(sce)[["ssGSEA"]] = colMeans(gsva.es[-1,])
colData(sce)[["positive"]] = rowMeans(sigs[,-(1:2)])
colData(sce)[["negative"]] = rowMeans(sigs[,(1:2)])
colData(sce) = cbind(colData(sce), reducedDim(sce, "UMAP"))
df = as.data.frame(colData(sce))
stopifnot(ncol(df) == 7)
names(df)[6:7] = c("UMAP1", "UMAP2")
g2 = ggplot(df, aes(x=louvain, y=ssGSEA, fill=louvain)) +
geom_boxplot() +
scale_fill_manual(values = default_palette)
g3 = ggplot(df, aes(x=louvain, y=positive, fill=louvain)) +
geom_boxplot() +
scale_color_manual(values = default_palette)
g4 = ggplot(df, aes(x=louvain, y=negative, fill=louvain)) +
geom_boxplot() +
scale_color_manual(values = default_palette)
ga1 = ggarrange(g2, g3, g4, ncol = 3, nrow = 1, legend="top",
common.legend=TRUE)
print(ga1)
ssGSEA_med = tapply(df$ssGSEA, df$louvain, median)
df2 = data.frame(louvain = as.factor(as.numeric(names(ssGSEA_med))),
ssGSEA = ssGSEA_med,
positive = tapply(df$positive, df$louvain, median),
negative = tapply(df$negative, df$louvain, median))
g5 = ggplot(df2, aes(x=ssGSEA, y=positive, color=louvain)) +
geom_point(size=3) +
scale_color_manual(values = default_palette)
g6 = ggplot(df2, aes(x=positive, y=negative, color=louvain)) +
geom_point(size=3) +
scale_color_manual(values = default_palette)
ga2 = ggarrange(g5, g6, ncol = 2, nrow = 1, legend="top",
common.legend=TRUE)
print(ga2)
fnm = sprintf("../../data/Zheng_2021_processed/%s.txt",
gsub(".counts.txt.gz", "", f1))
df$cell = colnames(sce)
fwrite(df, file=fnm, sep="\t")
system(sprintf("gzip -f %s", fnm))
print(sprintf("Done with %s", f1))
print(date())
print("------------------------------------------------------------------")
}## [1] "file: GSE156728_BC_10X.CD4.counts.txt.gz, dimension: (24148,3064)"
## [1] "2497/3064 cells with TCR"
## [1] "Lowery.CD4.neg.37g: 36/37 genes are found."
## [1] "Jansen_Term.diff.73g: 71/73 genes are found."
## [1] "Caushi.CD4.Tfh.2.66g: 63/66 genes are found."
## [1] "Oh.CD4.CXCL13.50g: 50/50 genes are found."
## [1] "Lowery.CD4.40g: 39/40 genes are found."
## Warning in .filterFeatures(expr, method): 6548 genes with constant expression
## values throuhgout the samples.
## [1] "the frequency of sigature genes:"
##
## 1 2 3 4
## 150 27 5 1
## [1] "8/4 genes are found for Hanada postive/negative signatures"
## [1] "number of genes used for PCA:"
## [1] 223
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## [1] "Done with GSE156728_BC_10X.CD4.counts.txt.gz"
## [1] "Fri Apr 29 17:14:09 2022"
## [1] "------------------------------------------------------------------"
## [1] "file: GSE156728_BCL_10X.CD4.counts.txt.gz, dimension: (28855,4238)"
## [1] "3491/4238 cells with TCR"
## [1] "Lowery.CD4.neg.37g: 36/37 genes are found."
## [1] "Jansen_Term.diff.73g: 71/73 genes are found."
## [1] "Caushi.CD4.Tfh.2.66g: 63/66 genes are found."
## [1] "Oh.CD4.CXCL13.50g: 50/50 genes are found."
## [1] "Lowery.CD4.40g: 39/40 genes are found."
## Warning in .filterFeatures(expr, method): 11462 genes with constant expression
## values throuhgout the samples.
## [1] "the frequency of sigature genes:"
##
## 1 2 3 4
## 150 27 5 1
## [1] "8/4 genes are found for Hanada postive/negative signatures"
## [1] "number of genes used for PCA:"
## [1] 223
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## [1] "Done with GSE156728_BCL_10X.CD4.counts.txt.gz"
## [1] "Fri Apr 29 17:17:00 2022"
## [1] "------------------------------------------------------------------"
## [1] "file: GSE156728_ESCA_10X.CD4.counts.txt.gz, dimension: (24148,12359)"
## [1] "9578/12359 cells with TCR"
## [1] "Lowery.CD4.neg.37g: 36/37 genes are found."
## [1] "Jansen_Term.diff.73g: 71/73 genes are found."
## [1] "Caushi.CD4.Tfh.2.66g: 63/66 genes are found."
## [1] "Oh.CD4.CXCL13.50g: 50/50 genes are found."
## [1] "Lowery.CD4.40g: 39/40 genes are found."
## Warning in .filterFeatures(expr, method): 3681 genes with constant expression
## values throuhgout the samples.
## [1] "the frequency of sigature genes:"
##
## 1 2 3 4
## 150 27 5 1
## [1] "8/4 genes are found for Hanada postive/negative signatures"
## [1] "number of genes used for PCA:"
## [1] 223
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## [1] "Done with GSE156728_ESCA_10X.CD4.counts.txt.gz"
## [1] "Fri Apr 29 18:22:43 2022"
## [1] "------------------------------------------------------------------"
## [1] "file: GSE156728_MM_10X.CD4.counts.txt.gz, dimension: (28855,3646)"
## [1] "2911/3646 cells with TCR"
## [1] "Lowery.CD4.neg.37g: 36/37 genes are found."
## [1] "Jansen_Term.diff.73g: 71/73 genes are found."
## [1] "Caushi.CD4.Tfh.2.66g: 63/66 genes are found."
## [1] "Oh.CD4.CXCL13.50g: 50/50 genes are found."
## [1] "Lowery.CD4.40g: 39/40 genes are found."
## Warning in .filterFeatures(expr, method): 11374 genes with constant expression
## values throuhgout the samples.
## [1] "the frequency of sigature genes:"
##
## 1 2 3 4
## 150 27 5 1
## [1] "8/4 genes are found for Hanada postive/negative signatures"
## [1] "number of genes used for PCA:"
## [1] 223
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## [1] "Done with GSE156728_MM_10X.CD4.counts.txt.gz"
## [1] "Fri Apr 29 18:23:50 2022"
## [1] "------------------------------------------------------------------"
## [1] "file: GSE156728_PACA_10X.CD4.counts.txt.gz, dimension: (24148,3904)"
## [1] "3167/3904 cells with TCR"
## [1] "Lowery.CD4.neg.37g: 36/37 genes are found."
## [1] "Jansen_Term.diff.73g: 71/73 genes are found."
## [1] "Caushi.CD4.Tfh.2.66g: 63/66 genes are found."
## [1] "Oh.CD4.CXCL13.50g: 50/50 genes are found."
## [1] "Lowery.CD4.40g: 39/40 genes are found."
## Warning in .filterFeatures(expr, method): 6126 genes with constant expression
## values throuhgout the samples.
## [1] "the frequency of sigature genes:"
##
## 1 2 3 4
## 150 27 5 1
## [1] "8/4 genes are found for Hanada postive/negative signatures"
## [1] "number of genes used for PCA:"
## [1] 223
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## [1] "Done with GSE156728_PACA_10X.CD4.counts.txt.gz"
## [1] "Fri Apr 29 18:24:46 2022"
## [1] "------------------------------------------------------------------"
## [1] "file: GSE156728_RC_10X.CD4.counts.txt.gz, dimension: (24148,10106)"
## [1] "8444/10106 cells with TCR"
## [1] "Lowery.CD4.neg.37g: 36/37 genes are found."
## [1] "Jansen_Term.diff.73g: 71/73 genes are found."
## [1] "Caushi.CD4.Tfh.2.66g: 63/66 genes are found."
## [1] "Oh.CD4.CXCL13.50g: 50/50 genes are found."
## [1] "Lowery.CD4.40g: 39/40 genes are found."
## Warning in .filterFeatures(expr, method): 4874 genes with constant expression
## values throuhgout the samples.
## [1] "the frequency of sigature genes:"
##
## 1 2 3 4
## 150 27 5 1
## [1] "8/4 genes are found for Hanada postive/negative signatures"
## [1] "number of genes used for PCA:"
## [1] 223
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## [1] "Done with GSE156728_RC_10X.CD4.counts.txt.gz"
## [1] "Fri Apr 29 18:27:18 2022"
## [1] "------------------------------------------------------------------"
## [1] "file: GSE156728_THCA_10X.CD4.counts.txt.gz, dimension: (24148,23509)"
## [1] "18514/23509 cells with TCR"
## [1] "Lowery.CD4.neg.37g: 36/37 genes are found."
## [1] "Jansen_Term.diff.73g: 71/73 genes are found."
## [1] "Caushi.CD4.Tfh.2.66g: 63/66 genes are found."
## [1] "Oh.CD4.CXCL13.50g: 50/50 genes are found."
## [1] "Lowery.CD4.40g: 39/40 genes are found."
## Warning in .filterFeatures(expr, method): 2980 genes with constant expression
## values throuhgout the samples.
## [1] "the frequency of sigature genes:"
##
## 1 2 3 4
## 150 27 5 1
## [1] "8/4 genes are found for Hanada postive/negative signatures"
## [1] "number of genes used for PCA:"
## [1] 223
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## [1] "Done with GSE156728_THCA_10X.CD4.counts.txt.gz"
## [1] "Fri Apr 29 18:33:15 2022"
## [1] "------------------------------------------------------------------"
## [1] "file: GSE156728_UCEC_10X.CD4.counts.txt.gz, dimension: (24148,12730)"
## [1] "10147/12730 cells with TCR"
## [1] "Lowery.CD4.neg.37g: 36/37 genes are found."
## [1] "Jansen_Term.diff.73g: 71/73 genes are found."
## [1] "Caushi.CD4.Tfh.2.66g: 63/66 genes are found."
## [1] "Oh.CD4.CXCL13.50g: 50/50 genes are found."
## [1] "Lowery.CD4.40g: 39/40 genes are found."
## Warning in .filterFeatures(expr, method): 4112 genes with constant expression
## values throuhgout the samples.
## [1] "the frequency of sigature genes:"
##
## 1 2 3 4
## 150 27 5 1
## [1] "8/4 genes are found for Hanada postive/negative signatures"
## [1] "number of genes used for PCA:"
## [1] 223
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.
## [1] "Done with GSE156728_UCEC_10X.CD4.counts.txt.gz"
## [1] "Fri Apr 29 18:38:20 2022"
## [1] "------------------------------------------------------------------"
gc()## used (Mb) gc trigger (Mb) limit (Mb) max used (Mb)
## Ncells 9373640 500.7 17113998 914 NA 17113998 914.0
## Vcells 632508139 4825.7 1825172888 13925 32768 2281466103 17406.3
sessionInfo()## R version 4.1.2 (2021-11-01)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur 10.16
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats4 stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] ggcorrplot_0.1.3 Rtsne_0.15
## [3] svd_0.5 bluster_1.4.0
## [5] cluster_2.1.2 scran_1.22.1
## [7] scater_1.22.0 scuttle_1.4.0
## [9] SingleCellExperiment_1.16.0 SummarizedExperiment_1.24.0
## [11] Biobase_2.54.0 GenomicRanges_1.46.1
## [13] GenomeInfoDb_1.30.0 IRanges_2.28.0
## [15] S4Vectors_0.32.3 BiocGenerics_0.40.0
## [17] MatrixGenerics_1.6.0 matrixStats_0.61.0
## [19] GSVA_1.42.0 readxl_1.3.1
## [21] stringr_1.4.0 ggpubr_0.4.0
## [23] ggplot2_3.3.5 Matrix_1.3-4
## [25] data.table_1.14.2
##
## loaded via a namespace (and not attached):
## [1] backports_1.4.0 plyr_1.8.6
## [3] igraph_1.2.9 GSEABase_1.56.0
## [5] BiocParallel_1.28.2 digest_0.6.29
## [7] htmltools_0.5.2 viridis_0.6.2
## [9] fansi_0.5.0 magrittr_2.0.1
## [11] memoise_2.0.1 ScaledMatrix_1.2.0
## [13] limma_3.50.0 Biostrings_2.62.0
## [15] annotate_1.72.0 R.utils_2.11.0
## [17] colorspace_2.0-2 blob_1.2.2
## [19] ggrepel_0.9.1 xfun_0.28
## [21] dplyr_1.0.7 crayon_1.4.2
## [23] RCurl_1.98-1.5 jsonlite_1.7.2
## [25] graph_1.72.0 glue_1.5.1
## [27] gtable_0.3.0 zlibbioc_1.40.0
## [29] XVector_0.34.0 DelayedArray_0.20.0
## [31] car_3.0-12 BiocSingular_1.10.0
## [33] Rhdf5lib_1.16.0 HDF5Array_1.22.1
## [35] abind_1.4-5 scales_1.1.1
## [37] DBI_1.1.1 edgeR_3.36.0
## [39] rstatix_0.7.0 Rcpp_1.0.7
## [41] viridisLite_0.4.0 xtable_1.8-4
## [43] dqrng_0.3.0 bit_4.0.4
## [45] rsvd_1.0.5 metapod_1.2.0
## [47] httr_1.4.2 FNN_1.1.3
## [49] ellipsis_0.3.2 pkgconfig_2.0.3
## [51] XML_3.99-0.8 R.methodsS3_1.8.1
## [53] farver_2.1.0 sass_0.4.0
## [55] uwot_0.1.10 locfit_1.5-9.4
## [57] utf8_1.2.2 tidyselect_1.1.1
## [59] labeling_0.4.2 rlang_0.4.12
## [61] reshape2_1.4.4 AnnotationDbi_1.56.2
## [63] munsell_0.5.0 cellranger_1.1.0
## [65] tools_4.1.2 cachem_1.0.6
## [67] cli_3.1.0 generics_0.1.1
## [69] RSQLite_2.2.8 broom_0.7.10
## [71] evaluate_0.14 fastmap_1.1.0
## [73] yaml_2.2.1 knitr_1.36
## [75] bit64_4.0.5 purrr_0.3.4
## [77] KEGGREST_1.34.0 sparseMatrixStats_1.6.0
## [79] R.oo_1.24.0 compiler_4.1.2
## [81] rstudioapi_0.13 beeswarm_0.4.0
## [83] png_0.1-7 ggsignif_0.6.3
## [85] tibble_3.1.6 statmod_1.4.36
## [87] bslib_0.3.1 stringi_1.7.6
## [89] highr_0.9 RSpectra_0.16-0
## [91] lattice_0.20-45 vctrs_0.3.8
## [93] pillar_1.6.4 lifecycle_1.0.1
## [95] rhdf5filters_1.6.0 jquerylib_0.1.4
## [97] RcppAnnoy_0.0.19 BiocNeighbors_1.12.0
## [99] cowplot_1.1.1 bitops_1.0-7
## [101] irlba_2.3.3 R6_2.5.1
## [103] gridExtra_2.3 vipor_0.4.5
## [105] codetools_0.2-18 assertthat_0.2.1
## [107] rhdf5_2.38.0 withr_2.4.3
## [109] GenomeInfoDbData_1.2.7 parallel_4.1.2
## [111] grid_4.1.2 beachmat_2.10.0
## [113] tidyr_1.1.4 rmarkdown_2.11
## [115] DelayedMatrixStats_1.16.0 carData_3.0-4
## [117] ggbeeswarm_0.6.0